In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Use of above libraries numerical computation, data manipulation and data visualization

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot
# Plotly library for building a graphs
In [2]:
tcs=pd.read_csv("TCSNS.csv")
tcs.head()
Out[2]:
Date Open High Low Close Adj Close Volume
0 2018-04-12 1505.000000 1575.000000 1503.974976 1569.625000 1408.547119 6114360
1 2018-04-13 1575.000000 1620.400024 1566.050049 1576.650024 1414.850952 8179118
2 2018-04-16 1558.000000 1599.974976 1555.375000 1593.824951 1430.263306 6270274
3 2018-04-17 1593.824951 1597.474976 1573.599976 1583.300049 1420.818848 3301724
4 2018-04-18 1583.000000 1596.724976 1572.925049 1579.550049 1417.453369 2433066
In [3]:
tcs.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1235 entries, 0 to 1234
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1235 non-null   object 
 1   Open       1235 non-null   float64
 2   High       1235 non-null   float64
 3   Low        1235 non-null   float64
 4   Close      1235 non-null   float64
 5   Adj Close  1235 non-null   float64
 6   Volume     1235 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 67.7+ KB
In [4]:
tcs['Date']=pd.to_datetime(tcs['Date'])
# to convert date column datatype to Date 
In [5]:
print("Dataframe contains stock prices between",tcs.Date.min(),"and",tcs.Date.max())
print("Total Days =",(tcs.Date.max()-tcs.Date.min()).days,"Days")
Dataframe contains stock prices between 2018-04-12 00:00:00 and 2023-04-11 00:00:00
Total Days = 1825 Days
In [6]:
tcs.describe()
Out[6]:
Open High Low Close Adj Close Volume
count 1235.000000 1235.000000 1235.000000 1235.000000 1235.000000 1.235000e+03
mean 2686.566638 2713.224065 2657.218266 2684.913217 2559.095946 3.000685e+06
std 678.613932 681.461456 674.466401 678.072787 705.299841 1.906813e+06
min 1505.000000 1575.000000 1503.974976 1569.625000 1408.547119 1.445300e+05
25% 2064.050049 2084.974976 2040.200012 2059.025024 1899.312683 1.856920e+06
50% 2655.000000 2676.000000 2621.600098 2649.600098 2533.968750 2.540719e+06
75% 3293.425049 3326.000000 3262.300049 3290.900024 3205.845337 3.509598e+06
max 4033.949951 4043.000000 3980.000000 4019.149902 3885.789795 2.290380e+07
In [7]:
# create box plot to check the outliers
tcs[["Open",'High','Low','Close','Adj Close']].plot(kind='box')
Out[7]:
<AxesSubplot:>
In [8]:
# Setting the Layout for our Plot using Plotly
layout=go.Layout(title='Stock Prices of TCS',xaxis=dict(title="Date"),yaxis=dict(title='Price'))
tcs_data=[{'x':tcs['Date'],'y':tcs['Close']}]
plot=go.Figure(data=tcs_data,layout=layout)
In [9]:
plot
Jul 2018Jan 2019Jul 2019Jan 2020Jul 2020Jan 2021Jul 2021Jan 2022Jul 2022Jan 2023150020002500300035004000
Stock Prices of TCSDatePrice
plotly-logomark
In [10]:
# Building the Regression Model
from sklearn.model_selection import train_test_split

#For Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# For Model Evaluation andChecking the Accuracy
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
C:\Users\tanma\anaconda3\lib\site-packages\scipy\__init__.py:146: UserWarning:

A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.2

In [11]:
# Splitting the Data into Training Set and Testing Set
# x variable contains the independent features and y-variable has the dependent features
x=np.array(tcs.index).reshape(-1,1)
y=tcs['Close']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=101)
In [12]:
#Feature Scaling for standardizing the dataset
scaler=StandardScaler().fit(x_train)
In [13]:
from sklearn.linear_model import LinearRegression
In [14]:
lm=LinearRegression()
lm.fit(x_train,y_train)
Out[14]:
LinearRegression()
In [15]:
# Plot Actual and Predicted Values for Train Dataset
trace0= go.Scatter(x=x_train.T[0],y=y_train,mode="markers",name="Actual")
trace1 = go.Scatter(x=x_train.T[0],y=lm.predict(x_train).T,mode='lines',name='predicted')
tcs_data=[trace0,trace1]
layout.xaxis.title.text='Day'
plot2=go.Figure(data=tcs_data, layout=layout)
    
In [16]:
plot2
020040060080010001200150020002500300035004000
ActualpredictedStock Prices of TCSDayPrice
plotly-logomark
In [17]:
score=f'''
{'Metrix'.ljust(10)}{'train'.center(20)}{'test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(y_train,lm.predict(x_train))}\t{r2_score(y_test,lm.predict(x_test))}
{'MSE'.ljust(10)}{mse(y_train,lm.predict(x_train))}\t{mse(y_test,lm.predict(x_test))}
'''
print(score)
Metrix           train                test        
r2_score  0.7700219820352993	0.7871548799849638
MSE       105159.11866541322	97951.27305330055

In [18]:
# Using LSTM Deep Learning for prediction
# To Analysis the Sequence of the Data
# To Messure the Drooupout Rate
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout
In [19]:
data=pd.read_csv('TCS Training.csv')
data.head()
Out[19]:
Date Open High Low Close Adj Close Volume
0 12-04-2018 1505.000000 1575.000000 1503.974976 1569.625000 1408.547119 6114360
1 13-04-2018 1575.000000 1620.400024 1566.050049 1576.650024 1414.850952 8179118
2 16-04-2018 1558.000000 1599.974976 1555.375000 1593.824951 1430.263306 6270274
3 17-04-2018 1593.824951 1597.474976 1573.599976 1583.300049 1420.818848 3301724
4 18-04-2018 1583.000000 1596.724976 1572.925049 1579.550049 1417.453369 2433066
In [20]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       920 non-null    object 
 1   Open       920 non-null    float64
 2   High       920 non-null    float64
 3   Low        920 non-null    float64
 4   Close      920 non-null    float64
 5   Adj Close  920 non-null    float64
 6   Volume     920 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 50.4+ KB
In [21]:
data['Close']=pd.to_numeric(data.Close,errors='coerce')
data=data.dropna()
traindata=data.iloc[:,4:5].values
In [22]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       920 non-null    object 
 1   Open       920 non-null    float64
 2   High       920 non-null    float64
 3   Low        920 non-null    float64
 4   Close      920 non-null    float64
 5   Adj Close  920 non-null    float64
 6   Volume     920 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 50.4+ KB
In [23]:
sc=MinMaxScaler(feature_range=(0,1))
traindata=sc.fit_transform(traindata)
In [24]:
traindata.shape
Out[24]:
(920, 1)
In [25]:
x_train=[]
y_train=[]

for i in range(60,920):
    x_train.append(traindata[i-60:i,0])
    y_train.append(traindata[i,0])
    
x_train,y_train=np.array(x_train),np.array(y_train)
In [26]:
x_train=np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1)) # adding the batch_size axis
x_train.shape
Out[26]:
(860, 60, 1)
In [27]:
model=Sequential()
model.add(LSTM(units=100,return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))

model.add(LSTM(units=100,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100,return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
In [28]:
hist=model.fit(x_train,y_train,epochs=20,batch_size=32,verbose=2)
Epoch 1/20
27/27 - 12s - loss: 0.0333 - 12s/epoch - 461ms/step
Epoch 2/20
27/27 - 5s - loss: 0.0056 - 5s/epoch - 174ms/step
Epoch 3/20
27/27 - 5s - loss: 0.0046 - 5s/epoch - 170ms/step
Epoch 4/20
27/27 - 5s - loss: 0.0046 - 5s/epoch - 172ms/step
Epoch 5/20
27/27 - 5s - loss: 0.0036 - 5s/epoch - 167ms/step
Epoch 6/20
27/27 - 5s - loss: 0.0038 - 5s/epoch - 167ms/step
Epoch 7/20
27/27 - 6s - loss: 0.0032 - 6s/epoch - 232ms/step
Epoch 8/20
27/27 - 7s - loss: 0.0035 - 7s/epoch - 246ms/step
Epoch 9/20
27/27 - 6s - loss: 0.0034 - 6s/epoch - 239ms/step
Epoch 10/20
27/27 - 6s - loss: 0.0028 - 6s/epoch - 235ms/step
Epoch 11/20
27/27 - 6s - loss: 0.0027 - 6s/epoch - 238ms/step
Epoch 12/20
27/27 - 7s - loss: 0.0033 - 7s/epoch - 241ms/step
Epoch 13/20
27/27 - 6s - loss: 0.0038 - 6s/epoch - 238ms/step
Epoch 14/20
27/27 - 6s - loss: 0.0026 - 6s/epoch - 228ms/step
Epoch 15/20
27/27 - 6s - loss: 0.0023 - 6s/epoch - 235ms/step
Epoch 16/20
27/27 - 6s - loss: 0.0025 - 6s/epoch - 232ms/step
Epoch 17/20
27/27 - 7s - loss: 0.0026 - 7s/epoch - 258ms/step
Epoch 18/20
27/27 - 6s - loss: 0.0025 - 6s/epoch - 237ms/step
Epoch 19/20
27/27 - 6s - loss: 0.0022 - 6s/epoch - 229ms/step
Epoch 20/20
27/27 - 9s - loss: 0.0028 - 9s/epoch - 333ms/step
In [30]:
plt.plot(hist.history['loss'])
plt.title('Training Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper right')
plt.show()
In [31]:
testdata=pd.read_csv("TCS Test.csv")
testdata['Close']=pd.to_numeric(testdata.Close,errors="coerce")
testdata=testdata.dropna()
testdata=testdata.iloc[:,4:5]
y_test=testdata.iloc[60:,0:].values
# input array for the model
inputclosing=testdata.iloc[:,0:].values
inputclosing_scaled=sc.transform(inputclosing)
inputclosing_scaled.shape
x_test=[]
length=len(testdata)
timestep=60
for i in range(timestep,length):
    x_test.append(inputclosing_scaled[i-timestep:i,0])
x_test=np.array(x_test)
x_test=np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1))
x_test.shape
Out[31]:
(255, 60, 1)
In [33]:
y_pred=model.predict(x_test)
y_pred
8/8 [==============================] - 0s 51ms/step
Out[33]:
array([[0.9699101 ],
       [0.973404  ],
       [0.9759752 ],
       [0.977911  ],
       [0.9797219 ],
       [0.9813508 ],
       [0.98208266],
       [0.98133975],
       [0.9789395 ],
       [0.975015  ],
       [0.969714  ],
       [0.9624284 ],
       [0.9524859 ],
       [0.9403732 ],
       [0.9275488 ],
       [0.9154215 ],
       [0.90462357],
       [0.89546245],
       [0.8881281 ],
       [0.8828883 ],
       [0.87958294],
       [0.877889  ],
       [0.8770355 ],
       [0.8765748 ],
       [0.87576383],
       [0.87418604],
       [0.87172127],
       [0.8683422 ],
       [0.86418283],
       [0.85955626],
       [0.85462886],
       [0.85007197],
       [0.8464737 ],
       [0.8429838 ],
       [0.83893573],
       [0.8341906 ],
       [0.82871896],
       [0.82194096],
       [0.81389815],
       [0.80529624],
       [0.79764634],
       [0.79217285],
       [0.7894852 ],
       [0.79006684],
       [0.79401934],
       [0.80092007],
       [0.8094769 ],
       [0.8185993 ],
       [0.82755315],
       [0.8353694 ],
       [0.8403552 ],
       [0.8413437 ],
       [0.83794904],
       [0.830123  ],
       [0.8181102 ],
       [0.8028081 ],
       [0.7861381 ],
       [0.7700738 ],
       [0.75672954],
       [0.74742943],
       [0.74280244],
       [0.7427606 ],
       [0.7464735 ],
       [0.75263816],
       [0.7602676 ],
       [0.767986  ],
       [0.7745512 ],
       [0.77947086],
       [0.78284985],
       [0.78485763],
       [0.7847197 ],
       [0.78166276],
       [0.77517694],
       [0.7651311 ],
       [0.7519513 ],
       [0.7369655 ],
       [0.7217589 ],
       [0.7083135 ],
       [0.69820994],
       [0.6922533 ],
       [0.69047886],
       [0.6920504 ],
       [0.69631034],
       [0.70305675],
       [0.712215  ],
       [0.72345024],
       [0.73605317],
       [0.7494517 ],
       [0.7631193 ],
       [0.7765377 ],
       [0.78921896],
       [0.80057824],
       [0.81055534],
       [0.81902695],
       [0.82586414],
       [0.8310084 ],
       [0.83433837],
       [0.83586353],
       [0.8355451 ],
       [0.8329919 ],
       [0.82789356],
       [0.8200721 ],
       [0.80983394],
       [0.7972906 ],
       [0.7833617 ],
       [0.7686922 ],
       [0.75394124],
       [0.7398356 ],
       [0.7269997 ],
       [0.71607345],
       [0.7076485 ],
       [0.70238906],
       [0.7007466 ],
       [0.7026252 ],
       [0.7068117 ],
       [0.71184945],
       [0.7159389 ],
       [0.71794033],
       [0.7175327 ],
       [0.7146862 ],
       [0.7097985 ],
       [0.70334196],
       [0.69597405],
       [0.6885288 ],
       [0.6818617 ],
       [0.6763268 ],
       [0.6720869 ],
       [0.6690182 ],
       [0.667569  ],
       [0.6681475 ],
       [0.67059714],
       [0.67483693],
       [0.68026084],
       [0.6863399 ],
       [0.69259536],
       [0.69857925],
       [0.7039873 ],
       [0.70884603],
       [0.7130484 ],
       [0.7167121 ],
       [0.7198138 ],
       [0.72243875],
       [0.72467583],
       [0.72651905],
       [0.72796565],
       [0.72920257],
       [0.7308308 ],
       [0.733179  ],
       [0.7360474 ],
       [0.7391314 ],
       [0.74221903],
       [0.74501115],
       [0.74720675],
       [0.74932224],
       [0.75202984],
       [0.75571525],
       [0.7605637 ],
       [0.76642185],
       [0.772863  ],
       [0.7789638 ],
       [0.7841261 ],
       [0.7879712 ],
       [0.79085004],
       [0.7932705 ],
       [0.7956931 ],
       [0.798347  ],
       [0.8012447 ],
       [0.8048235 ],
       [0.8091127 ],
       [0.8138179 ],
       [0.8182611 ],
       [0.82181305],
       [0.82387507],
       [0.82377154],
       [0.8211821 ],
       [0.8165666 ],
       [0.8108821 ],
       [0.80472356],
       [0.79810655],
       [0.7907862 ],
       [0.7829917 ],
       [0.7753162 ],
       [0.7683371 ],
       [0.7623032 ],
       [0.75748414],
       [0.7540863 ],
       [0.7521809 ],
       [0.7517648 ],
       [0.7526571 ],
       [0.7545817 ],
       [0.7575614 ],
       [0.7616058 ],
       [0.76652974],
       [0.7713855 ],
       [0.77597415],
       [0.78008264],
       [0.7838417 ],
       [0.78743577],
       [0.7912203 ],
       [0.7951408 ],
       [0.79925555],
       [0.80362374],
       [0.80806065],
       [0.81222755],
       [0.81614935],
       [0.82002264],
       [0.82391626],
       [0.8276264 ],
       [0.8310506 ],
       [0.8335951 ],
       [0.8350968 ],
       [0.8359456 ],
       [0.8367332 ],
       [0.837761  ],
       [0.8391687 ],
       [0.8412708 ],
       [0.8443579 ],
       [0.8484457 ],
       [0.85294217],
       [0.85729426],
       [0.86122984],
       [0.8648446 ],
       [0.86789775],
       [0.87003833],
       [0.8708081 ],
       [0.8696478 ],
       [0.8664311 ],
       [0.86133724],
       [0.8543353 ],
       [0.8455391 ],
       [0.835825  ],
       [0.82580036],
       [0.8161387 ],
       [0.80762917],
       [0.8010116 ],
       [0.7963791 ],
       [0.7935011 ],
       [0.7917103 ],
       [0.78997433],
       [0.7874468 ],
       [0.78365636],
       [0.7785314 ],
       [0.7721208 ],
       [0.76448864],
       [0.7560913 ],
       [0.7474955 ],
       [0.73925704],
       [0.7318706 ],
       [0.7256574 ],
       [0.72094446],
       [0.7183368 ],
       [0.7181373 ],
       [0.72054404],
       [0.7252632 ],
       [0.7320195 ]], dtype=float32)
In [34]:
predicted_price=sc.inverse_transform(y_pred)
In [35]:
plt.plot(y_test,color='red',label='Actual Stock Price')
plt.plot(predicted_price,color='green',label='Predicted Stock Price')
plt.title("TCS Stock Price Prediction")
plt.xlabel("Time")
plt.ylabel("Stock Price")
plt.legend()
plt.show()